{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 第七章 贝叶斯分类器"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "heading_collapsed": true
   },
   "source": [
    "## 7.1 试使用极大似然法估算西瓜数据集 3.0 中前3个属性的类条件概率。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-02-06T13:36:37.831758Z",
     "start_time": "2020-02-06T13:36:33.924387Z"
    },
    "hidden": true
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-02-06T13:36:38.063353Z",
     "start_time": "2020-02-06T13:36:37.938522Z"
    },
    "hidden": true
   },
   "outputs": [],
   "source": [
    "read_path = 'D:\\JWE\\Files\\Courses\\数据挖掘\\melon_data.csv'\n",
    "melon_data = pd.read_csv(read_path)\n",
    "data0 = melon_data.copy() \n",
    "data0.loc[data0['好瓜'] == '是',['好瓜']] = '好瓜'\n",
    "data0.loc[data0['好瓜'] == '否',['好瓜']] = '坏瓜'\n",
    "character_dict = {character:attr for character,attr in zip(data0.columns.drop(['编号','好瓜']),[set(data0.loc[:,[each]].values.flatten()) for each in data0.columns.drop(['编号','好瓜'])])}\n",
    "discrete_dict = {character:attr for character,attr in zip(data0.columns.drop(['密度','含糖率','编号','好瓜']),[set(data0.loc[:,[each]].values.flatten()) for each in data0.columns.drop(['密度','含糖率','编号','好瓜'])])}\n",
    "continuous_dict = {character:character_dict[character] for character in character_dict.keys()-discrete_dict.keys()}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-02-06T13:36:38.372174Z",
     "start_time": "2020-02-06T13:36:38.326050Z"
    },
    "hidden": true,
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>编号</th>\n",
       "      <th>色泽</th>\n",
       "      <th>根蒂</th>\n",
       "      <th>敲声</th>\n",
       "      <th>纹理</th>\n",
       "      <th>脐部</th>\n",
       "      <th>触感</th>\n",
       "      <th>密度</th>\n",
       "      <th>含糖率</th>\n",
       "      <th>好瓜</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>青绿</td>\n",
       "      <td>蜷缩</td>\n",
       "      <td>浊响</td>\n",
       "      <td>清晰</td>\n",
       "      <td>凹陷</td>\n",
       "      <td>硬滑</td>\n",
       "      <td>0.697</td>\n",
       "      <td>0.460</td>\n",
       "      <td>好瓜</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>乌黑</td>\n",
       "      <td>蜷缩</td>\n",
       "      <td>沉闷</td>\n",
       "      <td>清晰</td>\n",
       "      <td>凹陷</td>\n",
       "      <td>硬滑</td>\n",
       "      <td>0.774</td>\n",
       "      <td>0.376</td>\n",
       "      <td>好瓜</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>乌黑</td>\n",
       "      <td>蜷缩</td>\n",
       "      <td>浊响</td>\n",
       "      <td>清晰</td>\n",
       "      <td>凹陷</td>\n",
       "      <td>硬滑</td>\n",
       "      <td>0.634</td>\n",
       "      <td>0.264</td>\n",
       "      <td>好瓜</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>青绿</td>\n",
       "      <td>蜷缩</td>\n",
       "      <td>沉闷</td>\n",
       "      <td>清晰</td>\n",
       "      <td>凹陷</td>\n",
       "      <td>硬滑</td>\n",
       "      <td>0.608</td>\n",
       "      <td>0.318</td>\n",
       "      <td>好瓜</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>浅白</td>\n",
       "      <td>蜷缩</td>\n",
       "      <td>浊响</td>\n",
       "      <td>清晰</td>\n",
       "      <td>凹陷</td>\n",
       "      <td>硬滑</td>\n",
       "      <td>0.556</td>\n",
       "      <td>0.215</td>\n",
       "      <td>好瓜</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>6</td>\n",
       "      <td>青绿</td>\n",
       "      <td>稍蜷</td>\n",
       "      <td>浊响</td>\n",
       "      <td>清晰</td>\n",
       "      <td>稍凹</td>\n",
       "      <td>软粘</td>\n",
       "      <td>0.403</td>\n",
       "      <td>0.237</td>\n",
       "      <td>好瓜</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>7</td>\n",
       "      <td>乌黑</td>\n",
       "      <td>稍蜷</td>\n",
       "      <td>浊响</td>\n",
       "      <td>稍糊</td>\n",
       "      <td>稍凹</td>\n",
       "      <td>软粘</td>\n",
       "      <td>0.481</td>\n",
       "      <td>0.149</td>\n",
       "      <td>好瓜</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>8</td>\n",
       "      <td>乌黑</td>\n",
       "      <td>稍蜷</td>\n",
       "      <td>浊响</td>\n",
       "      <td>清晰</td>\n",
       "      <td>稍凹</td>\n",
       "      <td>硬滑</td>\n",
       "      <td>0.437</td>\n",
       "      <td>0.211</td>\n",
       "      <td>好瓜</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>9</td>\n",
       "      <td>乌黑</td>\n",
       "      <td>稍蜷</td>\n",
       "      <td>沉闷</td>\n",
       "      <td>稍糊</td>\n",
       "      <td>稍凹</td>\n",
       "      <td>硬滑</td>\n",
       "      <td>0.666</td>\n",
       "      <td>0.091</td>\n",
       "      <td>坏瓜</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>10</td>\n",
       "      <td>青绿</td>\n",
       "      <td>硬挺</td>\n",
       "      <td>清脆</td>\n",
       "      <td>清晰</td>\n",
       "      <td>平坦</td>\n",
       "      <td>软粘</td>\n",
       "      <td>0.243</td>\n",
       "      <td>0.267</td>\n",
       "      <td>坏瓜</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>11</td>\n",
       "      <td>浅白</td>\n",
       "      <td>硬挺</td>\n",
       "      <td>清脆</td>\n",
       "      <td>模糊</td>\n",
       "      <td>平坦</td>\n",
       "      <td>硬滑</td>\n",
       "      <td>0.245</td>\n",
       "      <td>0.057</td>\n",
       "      <td>坏瓜</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>12</td>\n",
       "      <td>浅白</td>\n",
       "      <td>蜷缩</td>\n",
       "      <td>浊响</td>\n",
       "      <td>模糊</td>\n",
       "      <td>平坦</td>\n",
       "      <td>软粘</td>\n",
       "      <td>0.343</td>\n",
       "      <td>0.099</td>\n",
       "      <td>坏瓜</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>13</td>\n",
       "      <td>青绿</td>\n",
       "      <td>稍蜷</td>\n",
       "      <td>浊响</td>\n",
       "      <td>稍糊</td>\n",
       "      <td>凹陷</td>\n",
       "      <td>硬滑</td>\n",
       "      <td>0.639</td>\n",
       "      <td>0.161</td>\n",
       "      <td>坏瓜</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>14</td>\n",
       "      <td>浅白</td>\n",
       "      <td>稍蜷</td>\n",
       "      <td>沉闷</td>\n",
       "      <td>稍糊</td>\n",
       "      <td>凹陷</td>\n",
       "      <td>硬滑</td>\n",
       "      <td>0.657</td>\n",
       "      <td>0.198</td>\n",
       "      <td>坏瓜</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>15</td>\n",
       "      <td>乌黑</td>\n",
       "      <td>稍蜷</td>\n",
       "      <td>浊响</td>\n",
       "      <td>清晰</td>\n",
       "      <td>稍凹</td>\n",
       "      <td>软粘</td>\n",
       "      <td>0.360</td>\n",
       "      <td>0.370</td>\n",
       "      <td>坏瓜</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>16</td>\n",
       "      <td>浅白</td>\n",
       "      <td>蜷缩</td>\n",
       "      <td>浊响</td>\n",
       "      <td>模糊</td>\n",
       "      <td>平坦</td>\n",
       "      <td>硬滑</td>\n",
       "      <td>0.593</td>\n",
       "      <td>0.042</td>\n",
       "      <td>坏瓜</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>17</td>\n",
       "      <td>青绿</td>\n",
       "      <td>蜷缩</td>\n",
       "      <td>沉闷</td>\n",
       "      <td>稍糊</td>\n",
       "      <td>稍凹</td>\n",
       "      <td>硬滑</td>\n",
       "      <td>0.719</td>\n",
       "      <td>0.103</td>\n",
       "      <td>坏瓜</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    编号  色泽  根蒂  敲声  纹理  脐部  触感     密度    含糖率  好瓜\n",
       "0    1  青绿  蜷缩  浊响  清晰  凹陷  硬滑  0.697  0.460  好瓜\n",
       "1    2  乌黑  蜷缩  沉闷  清晰  凹陷  硬滑  0.774  0.376  好瓜\n",
       "2    3  乌黑  蜷缩  浊响  清晰  凹陷  硬滑  0.634  0.264  好瓜\n",
       "3    4  青绿  蜷缩  沉闷  清晰  凹陷  硬滑  0.608  0.318  好瓜\n",
       "4    5  浅白  蜷缩  浊响  清晰  凹陷  硬滑  0.556  0.215  好瓜\n",
       "5    6  青绿  稍蜷  浊响  清晰  稍凹  软粘  0.403  0.237  好瓜\n",
       "6    7  乌黑  稍蜷  浊响  稍糊  稍凹  软粘  0.481  0.149  好瓜\n",
       "7    8  乌黑  稍蜷  浊响  清晰  稍凹  硬滑  0.437  0.211  好瓜\n",
       "8    9  乌黑  稍蜷  沉闷  稍糊  稍凹  硬滑  0.666  0.091  坏瓜\n",
       "9   10  青绿  硬挺  清脆  清晰  平坦  软粘  0.243  0.267  坏瓜\n",
       "10  11  浅白  硬挺  清脆  模糊  平坦  硬滑  0.245  0.057  坏瓜\n",
       "11  12  浅白  蜷缩  浊响  模糊  平坦  软粘  0.343  0.099  坏瓜\n",
       "12  13  青绿  稍蜷  浊响  稍糊  凹陷  硬滑  0.639  0.161  坏瓜\n",
       "13  14  浅白  稍蜷  沉闷  稍糊  凹陷  硬滑  0.657  0.198  坏瓜\n",
       "14  15  乌黑  稍蜷  浊响  清晰  稍凹  软粘  0.360  0.370  坏瓜\n",
       "15  16  浅白  蜷缩  浊响  模糊  平坦  硬滑  0.593  0.042  坏瓜\n",
       "16  17  青绿  蜷缩  沉闷  稍糊  稍凹  硬滑  0.719  0.103  坏瓜"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "{'色泽': {'乌黑', '浅白', '青绿'},\n",
       " '根蒂': {'硬挺', '稍蜷', '蜷缩'},\n",
       " '敲声': {'沉闷', '浊响', '清脆'},\n",
       " '纹理': {'模糊', '清晰', '稍糊'},\n",
       " '脐部': {'凹陷', '平坦', '稍凹'},\n",
       " '触感': {'硬滑', '软粘'}}"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data0\n",
    "discrete_dict\n",
    "# continuous_dict"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-02-06T13:36:38.737645Z",
     "start_time": "2020-02-06T13:36:38.591757Z"
    },
    "hidden": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'色泽=青绿|坏瓜': 3,\n",
       " '色泽=乌黑|坏瓜': 2,\n",
       " '色泽=浅白|坏瓜': 4,\n",
       " '根蒂=硬挺|坏瓜': 2,\n",
       " '根蒂=蜷缩|坏瓜': 3,\n",
       " '根蒂=稍蜷|坏瓜': 4,\n",
       " '敲声=沉闷|坏瓜': 3,\n",
       " '敲声=浊响|坏瓜': 4,\n",
       " '敲声=清脆|坏瓜': 2,\n",
       " '纹理=清晰|坏瓜': 2,\n",
       " '纹理=模糊|坏瓜': 3,\n",
       " '纹理=稍糊|坏瓜': 4,\n",
       " '脐部=凹陷|坏瓜': 2,\n",
       " '脐部=平坦|坏瓜': 4,\n",
       " '脐部=稍凹|坏瓜': 3,\n",
       " '触感=软粘|坏瓜': 3,\n",
       " '触感=硬滑|坏瓜': 6,\n",
       " '色泽=青绿|好瓜': 3,\n",
       " '色泽=乌黑|好瓜': 4,\n",
       " '色泽=浅白|好瓜': 1,\n",
       " '根蒂=硬挺|好瓜': 0,\n",
       " '根蒂=蜷缩|好瓜': 5,\n",
       " '根蒂=稍蜷|好瓜': 3,\n",
       " '敲声=沉闷|好瓜': 2,\n",
       " '敲声=浊响|好瓜': 6,\n",
       " '敲声=清脆|好瓜': 0,\n",
       " '纹理=清晰|好瓜': 7,\n",
       " '纹理=模糊|好瓜': 0,\n",
       " '纹理=稍糊|好瓜': 1,\n",
       " '脐部=凹陷|好瓜': 5,\n",
       " '脐部=平坦|好瓜': 0,\n",
       " '脐部=稍凹|好瓜': 3,\n",
       " '触感=软粘|好瓜': 2,\n",
       " '触感=硬滑|好瓜': 6}"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "bys_dict = {f'{character}={attr}|{classi}':len(data0.loc[(data0[character]==attr)&(data0['好瓜']==classi),:]) for classi in set(data0.loc[:,['好瓜']].values.flatten()) for character in list(discrete_dict.keys()) for attr in discrete_dict[character]}\n",
    "bys_dict\n",
    "# len(bys_dict)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "hidden": true
   },
   "source": [
    "这里只计算第一个属性$(色泽=attr|好瓜)$的类条件概率，假设该属性条件分布如下\n",
    "$$\n",
    "P(色泽=浅白|好瓜=是) = p_{1}\\\\\n",
    "P(色泽=乌黑|好瓜=是) = p_{2}\\\\\n",
    "P(色泽=青绿|好瓜=是) = p_{3} = 1 - p_{1} - p_{2}\\\\\n",
    "$$\n",
    "该属性各取值的条件概率极大似然函数为\n",
    "$$\n",
    "L \n",
    "= P(色泽=浅白|好瓜=是)^{1} \\cdot P(色泽=乌黑|好瓜=是)^{4} \\cdot P(色泽=青绿|好瓜=是)^{3} \n",
    "= p_1^1 \\cdot p_2^4 \\cdot (1 - p_1 - p_2)^3\n",
    "$$\n",
    "取对数，求偏导，找极值\n",
    "$$\n",
    "令\n",
    "\\begin{cases}\n",
    "\\frac{\\partial ln(L)}{\\partial p_1} = 0 \\\\\n",
    "\\frac{\\partial ln(L)}{\\partial p_2} = 0 \\\\\n",
    "\\end{cases}\n",
    "$$\n",
    "求得\n",
    "$$\n",
    "p_1 = \\frac{1}{8} \\\\\n",
    "p_2 = \\frac{1}{2} \\\\\n",
    "p_3 = \\frac{3}{8} \\\\\n",
    "$$"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "heading_collapsed": true
   },
   "source": [
    "## 7.2.试证明：条件独立性假设不成立时，朴素贝叶斯分类器任有可能产生最优分类器。"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "hidden": true
   },
   "source": [
    "假设条件独立性不成立，朴素贝叶斯分类器仍按条件独立进行。\n",
    "\n",
    "此时若数据集无冲突数据，且具有相同属性取值的样本类别相同，不同属性取值的样本类别不同，则朴素贝叶斯分类器仍产生最优分类器。"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "heading_collapsed": true
   },
   "source": [
    "## 7.3.试编程实现拉普拉斯修正的朴素贝叶斯分类器，并以西瓜数据集3.0为训练集，并对“测1”样本进行分类。"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "hidden": true
   },
   "source": [
    "沿用上面代码"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-02-06T13:36:39.025911Z",
     "start_time": "2020-02-06T13:36:38.992824Z"
    },
    "hidden": true,
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>色泽</th>\n",
       "      <th>根蒂</th>\n",
       "      <th>敲声</th>\n",
       "      <th>纹理</th>\n",
       "      <th>脐部</th>\n",
       "      <th>触感</th>\n",
       "      <th>密度</th>\n",
       "      <th>含糖率</th>\n",
       "      <th>好瓜</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>青绿</td>\n",
       "      <td>蜷缩</td>\n",
       "      <td>浊响</td>\n",
       "      <td>清晰</td>\n",
       "      <td>凹陷</td>\n",
       "      <td>硬滑</td>\n",
       "      <td>0.697</td>\n",
       "      <td>0.46</td>\n",
       "      <td>？</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   色泽  根蒂  敲声  纹理  脐部  触感     密度   含糖率 好瓜\n",
       "0  青绿  蜷缩  浊响  清晰  凹陷  硬滑  0.697  0.46  ？"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# data0\n",
    "# discrete_dict\n",
    "# continuous_dict\n",
    "test_dict = {'色泽':['青绿'],'根蒂':['蜷缩'],'敲声':['浊响'],'纹理':['清晰'],'脐部':['凹陷'],'触感':['硬滑'],'密度':[0.697],'含糖率':[0.460],'好瓜':['？']}\n",
    "test1 = pd.DataFrame(data=test_dict)\n",
    "test1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-02-06T13:36:39.304651Z",
     "start_time": "2020-02-06T13:36:39.274071Z"
    },
    "hidden": true
   },
   "outputs": [],
   "source": [
    "def Normpdf(x,mu=0,sigma=1):\n",
    "    return np.exp(-((x - mu)**2)/(2*sigma**2))/(np.sqrt(2*np.pi)*sigma)\n",
    "\n",
    "def SimpleBayes(data=data0,simple=test1):\n",
    "    simple_dict = {f'{character}':f'{character}={attr}' for character,attr in zip(simple.columns.drop(['好瓜']),simple.drop(['好瓜'],axis=1).values.flatten())}\n",
    "    simpleD_dict = {f'{character}':f'{character}={attr}' for character,attr in zip(simple.columns.drop(['密度','含糖率','好瓜']),simple.drop(['密度','含糖率','好瓜'],axis=1).values.flatten())}\n",
    "    simpleC_dict = {character:simple_dict[character] for character in simple_dict.keys()-simpleD_dict.keys()}\n",
    "    character_dict = {character:attr for character,attr in zip(data.columns.drop(['编号','好瓜']),[set(data.loc[:,[each]].values.flatten()) for each in data.columns.drop(['编号','好瓜'])])}\n",
    "    discrete_dict = {character:attr for character,attr in zip(data.columns.drop(['密度','含糖率','编号','好瓜']),[set(data.loc[:,[each]].values.flatten()) for each in data.columns.drop(['密度','含糖率','编号','好瓜'])])}\n",
    "    continuous_dict = {character:character_dict[character] for character in character_dict.keys()-discrete_dict.keys()}\n",
    "    bayesD_dict = {}\n",
    "    bayesC_dict = {}\n",
    "    for classi in set(data.loc[:,['好瓜']].values.flatten()):\n",
    "        bayesD_dict[classi] = {f'{character}={attr}':(len(data.loc[(data[character]==attr)&(data['好瓜']==classi),:])+1)/(len(data.loc[data['好瓜']==classi,:])+len(discrete_dict[character])) for character in list(discrete_dict.keys()) for attr in discrete_dict[character]}\n",
    "        bayesC_dict[classi] = {f'{character}={attr}':Normpdf(attr,float(data.loc[data['好瓜']==classi,[character]].mean().values),float(data.loc[data['好瓜']==classi,[character]].std().values)) for character,attr in zip(continuous_dict.keys(),simple.loc[:,['密度','含糖率']].values.flatten())}\n",
    "#     bayes_dict = dict(bayesD_dict,**bayesC_dict)\n",
    "    PT = (len(data.loc[data['好瓜']=='好瓜',:])+1)/(len(data)+len(set(data['好瓜'].values.flatten())))\n",
    "    PF = (len(data.loc[data['好瓜']=='坏瓜',:])+1)/(len(data)+len(set(data['好瓜'].values.flatten())))\n",
    "    for index in simpleD_dict.keys():\n",
    "        PT *= bayesD_dict['好瓜'][simpleD_dict[index]]\n",
    "    for index in simpleC_dict.keys():\n",
    "        PT *= bayesC_dict['好瓜'][simpleC_dict[index]]\n",
    "    for index in simpleD_dict.keys():\n",
    "        PF *= bayesD_dict['坏瓜'][simpleD_dict[index]]\n",
    "    for index in simpleC_dict.keys():\n",
    "        PF *= bayesC_dict['坏瓜'][simpleC_dict[index]]\n",
    "    MLE_dict = {'好瓜':PT,'坏瓜':PF}\n",
    "#     print(MLE_dict)\n",
    "    result = max(MLE_dict,key=MLE_dict.get)\n",
    "    return result,MLE_dict\n",
    "#     return bayesD_dict,bayesC_dict,simple_dict"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-02-06T13:36:39.628010Z",
     "start_time": "2020-02-06T13:36:39.494666Z"
    },
    "hidden": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "('好瓜', {'好瓜': 0.025631024529740677, '坏瓜': 7.722360621178051e-05})"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "SimpleBayes(data0)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "heading_collapsed": true
   },
   "source": [
    "## 7.4.实践中用式(7.15)决定分类类别时，若数据的维度非常高，则连乘的概率结果会非常接近0并导致下溢。试述防止下溢的可能方案。"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "hidden": true
   },
   "source": [
    "把概率取对数，则连乘变为连加。"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "heading_collapsed": true
   },
   "source": [
    "## 7.5.试证明:二分类任务中两类数据满足高斯分布且方差相同时，线性判别分析产生最优贝叶斯分类器。"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "hidden": true
   },
   "source": [
    "日后再补"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "heading_collapsed": true
   },
   "source": [
    "## 7.6.试编程实现AODE分类器，并以西瓜数据集3.0为训练集，并对“测1”样本进行分类。"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "hidden": true
   },
   "source": [
    "日后再补"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "hidden": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "hidden": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "hidden": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "hidden": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "heading_collapsed": true
   },
   "source": [
    "## 7.7.给定d个二值属性的分类任务，假设对于任何先验概率的估算需要30个样本。试估计AODE中估算先验概率p(c,xi)所需要的样本数。"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "hidden": true
   },
   "source": [
    "日后再补"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "heading_collapsed": true
   },
   "source": [
    "## 7.8.考虑图7.3，证明：在同父结构中，若x1的取值未知，则x3⊥x4不成立。在顺序结构中，y⊥z|x成立，但y⊥z不成立。"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "hidden": true
   },
   "source": [
    "日后再补"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "heading_collapsed": true
   },
   "source": [
    "## 7.9 以西瓜数据集 2.0 为训练集，试基于 BIC 准则构建一个贝叶斯网。"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "hidden": true
   },
   "source": [
    "日后再补"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "hidden": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "heading_collapsed": true
   },
   "source": [
    "## 7.10 以西瓜数据集 2.0 中属性\"脐部\"为隐变量，试基于 EM 算法构建一个贝叶斯网。"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "hidden": true
   },
   "source": [
    "日后再补"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "hidden": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "hide_input": false,
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  },
  "varInspector": {
   "cols": {
    "lenName": 16,
    "lenType": 16,
    "lenVar": 40
   },
   "kernels_config": {
    "python": {
     "delete_cmd_postfix": "",
     "delete_cmd_prefix": "del ",
     "library": "var_list.py",
     "varRefreshCmd": "print(var_dic_list())"
    },
    "r": {
     "delete_cmd_postfix": ") ",
     "delete_cmd_prefix": "rm(",
     "library": "var_list.r",
     "varRefreshCmd": "cat(var_dic_list()) "
    }
   },
   "types_to_exclude": [
    "module",
    "function",
    "builtin_function_or_method",
    "instance",
    "_Feature"
   ],
   "window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
